/* 
   This program imports the CART imputation .csv files for the FHS industries (excluding concrete) into SAS,
   merges the survu_id, and saves the merged files.

   input files:
	XXX_imputes.csv
	XXX_predicted.csv

	where XXX is one of the FHS industries: boxes, bread, carbon,
          coffee, floor (hardwood flooring), gas, ice, plywood, or sugar.

   output files:
	fhs.XXX_imputes;

	where XXX is one of the FHS industries: boxes, bread, carbon,
          coffee, floor (hardwood flooring), gas, ice, plywood, or sugar.
*/

%include "ASMimplibs.sas";



%MACRO import_imputes(ind);



            data WORK.&ind._imputes                           ;
            %let _EFIERR_ = 0; /* set the ERROR detection macro variable */
            infile "&ind._imputes.csv" delimiter = ',' MISSOVER DSD lrecl=32767 firstobs=2 ;
               informat number $5. ;
               informat Year $4. ;
               informat PV best32. ;
               informat PQS best32. ;
               informat TAB best32. ;
               informat EE best32. ;
               informat CF best32. ;
               informat CM best32. ;
               informat PW best32. ;
               informat PH best32. ;
               informat SW best32. ;
               informat WW best32. ;
               informat TVS best32. ;
               informat exit02 $1. ;
               informat nonpw best32. ;
               informat impsetnum best32. ;
               format number $5. ;
               format Year $4. ;
               format PV best12. ;
               format PQS best12. ;
               format TAB best12. ;
               format EE best12. ;
               format CF best12. ;
               format CM best12. ;
               format PW best12. ;
               format PH best12. ;
               format SW best12. ;
               format WW best12. ;
               format TVS best12. ;
               format exit02 $1.;
               format nonpw best12. ;
               format impsetnum best12. ;
            input
                        number $
                        Year $
                        PV
                        PQS
                        TAB
                        EE
                        CF
                        CM
                        PW
                        PH
                        SW
                        WW
                        TVS
                        exit02 $
                        nonpw
                        impsetnum
            ;
            if _ERROR_ then call symputx('_EFIERR_',1);  /* set ERROR detection macro variable */
            run;



%MEND ;

*%import_imputes(gas,02);
*%import_imputes(gas,07);
*%import_imputes(bread);
*%import_imputes(carbon);
*%import_imputes(coffee);
*%import_imputes(floor);
%import_imputes(ice);
*%import_imputes(ice);

*%import_imputes(plywood,02);
*%import_imputes(sugar,02);
*%import_imputes(boxes,02);


%MACRO merge_ids(ind,year);

  data &ind._imputes (keep = NUMBER_NUM year_num pv pqs tab ee cf cm pw ph sw ww tvs exit02 nonpw impsetnum);
  /* set &ind._imputes&year; */
   set &ind._imputes; 
   NUMBER_NUM = input(NUMBER,5.);
   /** year = "20&year"; **/
   year_num = input(year,4.); 
  run;

  PROC DATASETS  LIBRARY=WORK;
   MODIFY &ind._imputes;
   rename NUMBER_NUM = NUMBER;
   rename year_NUM = year;
  RUN;

  PROC SORT DATA=&ind._imputes;
   by NUMBER;
  RUN;

  /** Merge survu_id and zeros flags **/

 data &ind._ids (keep = number survu_id tvs_zerof cm_zerof cf_zerof ee_zerof tab_zerof ph_zerof ww_zerof pw_zerof); 
/*  set fhs.&ind._gooddata_ids&year; */
  set fhs.&ind._gooddata_ids;
 run;
 
 proc sort data=&ind._ids; by number; run;

  data &ind._imputes_after_merge;
   merge &ind._ids (in=inids) &ind._imputes (in=inimp); 
  by NUMBER;
  if inids and inimp;
  run;

  
  /* In cases where we replaced zeroes with 1's before running the CART scripts,
     Replace the 1's with the original zeros. */
  data &ind._imputes_after_merge;
   set &ind._imputes_after_merge;
   if tvs_zerof = 1 then tvs = 0;
   if cm_zerof  = 1 then cm  = 0;
   if cf_zerof  = 1 then cf  = 0;
   if ee_zerof  = 1 then ee  = 0;
   if tab_zerof = 1 then tab = 0;
   if ph_zerof  = 1 then ph  = 0;
   if ww_zerof  = 1 then ww  = 0;
   if pw_zerof  = 1 then pw  = 0;
  run; 


  proc datasets library=work;
   modify &ind._imputes_after_merge ;
    rename impsetnum=_IMPUTE_;
  run;

/*  data &ind._imputes&year; */
  data &ind._imputes;
   set &ind._imputes_after_merge;
 run;

%MEND;


*%merge_ids(gas,02);
*%merge_ids(gas,07);
*%merge_ids(bread);
*%merge_ids(carbon);
*%merge_ids(coffee);
*%merge_ids(floor);

%merge_ids(ice,02);

*%merge_ids(ice,07);
*%merge_ids(plywood,02);
*%merge_ids(sugar,02);
*%merge_ids(boxes,02);

data fhs.ice_imputes; set ice_imputes; run;

  /* Check to make sure we got all the survu_id's 
    matched up correctly: */
  /* Note: rejCB&ind should be empty. */

  data CBice (keep = survu_id year) ; set ppsr50.icef; run;
  proc sort data=CBice; by year survu_id; run;
  proc sort data=fhs.ice_imputes; by year survu_id; run;

  data testice rejCBice;
    merge fhs.ice_imputes (in=inCART) CBice (in=inCB);
    by year survu_id;
    if inCART and inCB then output testice;
    else if inCB then output rejCBice;
  run;


/**data fhs.gas_imputes; set gas_imputes02 gas_imputes07; run;
data fhs.bread_imputes; set bread_imputes02 bread_imputes07; run;
data fhs.carbon_imputes; set carbon_imputes02 carbon_imputes07; run;
data fhs.coffee_imputes; set coffee_imputes02 coffee_imputes07; run;
data fhs.floor_imputes; set floor_imputes02 floor_imputes07; run;
data fhs.plywood_imputes; set plywood_imputes02; run;
data fhs.sugar_imputes; set sugar_imputes02 ; run;
data fhs.boxes_imputes; set boxes_imputes02 ; run;
**/
